License

The R markdown code that generated this document is licensed under an MIT software license.

The document itself, and all graphs, tables, figures and text within it, is licensed under a Creative Commons Attribution 4.0 International license (CC-BY).

Reading in and merging the data.

# set working directory, load libraries, set-up
#wd<-"/Users/marc/work/MLW_LSTM/CRYPTOFAZ"
#setwd(wd)

outDir<-paste(sep="","../RESULTS/",gsub(pattern="-",replacement="",Sys.Date()))
inDir<-"../../DATA/20190820"
inDir2<-"../../DATA/20190815"
inDir3<-"../../DATA/20200401"
inDir4<-"../20201023_PreScreenData"
inDir5<-"../20200804_DummyPID"
if(!dir.exists(outDir)){dir.create(outDir)}

elisaFile1<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA 02_01MAY2019.csv")
elisaFile2<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA_27AUG2018.csv")
elisaFile3<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA3_29APR2019.csv")
pcrFile<-paste(sep="/",inDir2,"pcr_datatransfer.csv")
pcrCtFile<-paste(sep="/",inDir3,"PCRResults-Ct.update.csv")
preScreenFile<-paste(sep="/",inDir4,"RDT-PCR_complete.csv")

dummyPidFile<-paste(sep="/",inDir5,"dummyPID_realPID.csv")

outFileElisa<-paste(sep="",outDir,"/cryptofazElisaNormalised_",Sys.Date(),".csv")
outFileMergedData<-paste(sep="",outDir,"/cryptofazPcrElisaMerged_",Sys.Date(),".csv")

outPrefix<-paste(sep="",outDir,"/cryptofazElisaPcr_",Sys.Date())

#logFile<-paste(sep="",outDir,"/cryptofazLogFile_",gsub(Sys.Date(),pattern="-",replacement=""),".log")
logFile<-""

cat(file=logFile,append=F,paste(sep="","This is cryptofazELISA_normalisation20200318.Rmd.\n\nInput parameters:\n\telisaFile1 = < ",elisaFile1," >,\n\telisaFile2 = < ",elisaFile2," >,\n\telisafile3 = < ",elisaFile3," >,\n\tpcrFile = < ",pcrFile," >,\n\tpcrCtFile = < ",pcrCtFile," >,\n\tpreScreenFile = < ",preScreenFile,">,\n\toutFileElisa = < ",outFileElisa," >,\n\toutFileMergedData = < ",outFileMergedData," >.\n\n"))
## This is cryptofazELISA_normalisation20200318.Rmd.
## 
## Input parameters:
##  elisaFile1 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA 02_01MAY2019.csv >,
##  elisaFile2 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA_27AUG2018.csv >,
##  elisafile3 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA3_29APR2019.csv >,
##  pcrFile = < ../../DATA/20190815/pcr_datatransfer.csv >,
##  pcrCtFile = < ../../DATA/20200401/PCRResults-Ct.update.csv >,
##  preScreenFile = < ../20201023_PreScreenData/RDT-PCR_complete.csv>,
##  outFileElisa = < ../RESULTS/20201023/cryptofazElisaNormalised_2020-10-23.csv >,
##  outFileMergedData = < ../RESULTS/20201023/cryptofazPcrElisaMerged_2020-10-23.csv >.
# read the data
eli1<-read.csv(elisaFile1,stringsAsFactors=F)
eli2<-read.csv(elisaFile2,stringsAsFactors=F)
eli3<-read.csv(elisaFile3,stringsAsFactors=F)
pcr<-read.csv(pcrFile,stringsAsFactors=F)
pcrCt<-read.csv(pcrCtFile,stringsAsFactors=F)
preScreen<-read.csv(preScreenFile,stringsAsFactors=F)
dummyPID<-read.csv(dummyPidFile,stringsAsFactors=F)
cat(file=logFile,append=T,paste(sep="","Reading the data.\n",
                                "..ELISA file 1: ",nrow(eli1)," rows, ",ncol(eli1)," columns \n",
                                "..ELISA file 2: ",nrow(eli2)," rows, ",ncol(eli2)," columns \n",
                                "..ELISA file 3: ",nrow(eli3)," rows, ",ncol(eli3)," columns \n",
                                "..PCR file: ",nrow(pcr)," rows, ",ncol(pcr)," columns \n",
                                "..PCR Ct file: ",nrow(pcrCt)," rows, ",ncol(pcrCt)," columns \n",
                                "..Pre-screening file: ",nrow(preScreen)," rows, ",ncol(preScreen)," columns \n",
                                "..Dummy PID file: ",nrow(dummyPID)," rows, ",ncol(dummyPID)," columns \n",
                                "Done.\n\n"))
## Reading the data.
## ..ELISA file 1: 43 rows, 5 columns 
## ..ELISA file 2: 33 rows, 4 columns 
## ..ELISA file 3: 76 rows, 5 columns 
## ..PCR file: 190 rows, 34 columns 
## ..PCR Ct file: 428 rows, 7 columns 
## ..Pre-screening file: 582 rows, 7 columns 
## ..Dummy PID file: 22 rows, 2 columns 
## Done.
# fix the Ct values (recode 'Undertermined' values)
pcrCt$Ct[pcrCt$Ct=="Undetermined"]<-"40" # check with James and Darwin whether 'Undertermined' should be NA or a very high Ct value
pcrCt$Ct<-as.numeric(pcrCt$Ct)
pcrCt$Time.point<-gsub(pattern=" $",replacement="",pcrCt$Time.point)
pcrCt$Time.point<-gsub(pattern=" ",replacement="_",pcrCt$Time.point)
pcrCt$Time.point<-tolower(pcrCt$Time.point)
pcrCt<-pcrCt[is.element(el=pcrCt$Time.point,set=c("1st_am","only")),]
pcrCt$Lab.Sample.ID<-pcrCt$Sample.ID
pcrCt$Visit[pcrCt$Lab.Sample.ID=="CDE102F1"]<-6 # mnaually correcting the Visit number for this sample; see email from James Nyirenda 10/07/2020 that the visit was incorrectly recorded for this sample as it was run on a different experiment

# remove a pre-screening row with duplicated PATID and NA qPCR.Ct value
preScreen<-preScreen[!(preScreen$PATID=="CFZ0010019" & is.na(preScreen$qPCR.Ct)),]
preScreen$RDT.Result<-tolower(gsub(pattern=" ",replacement="",preScreen$RDT.Result))
preScreen$qPCR.Result<-tolower(gsub(pattern=" ",replacement="",preScreen$qPCR.Result))
preScreen$RDT.Result[preScreen$RDT.Result==""]<-NA
preScreen$qPCR.Result[preScreen$qPCR.Result==""]<-NA
preScreen$qPCR.Result32<-ifelse(preScreen$qPCR.Ct<32,"positive","negative")

# normalise
eli1$OD.norm<-(eli1$Optical.Density-eli1$Optical.Density[eli1$Sample.Lab.ID=="Negative Control"])/(eli1$Optical.Density[eli1$Sample.Lab.ID=="Positive Control"]-eli1$Optical.Density[eli1$Sample.Lab.ID=="Negative Control"])
eli2$OD.norm<-(eli2$OPTICAL.DENSITY-eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Negative Control"])/(eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Positive Control"]-eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Negative Control"])
eli3$OD.norm<-(eli3$Absorbance-eli3$Absorbance[eli3$Sample.ID=="Negative Control"])/(eli3$Absorbance[eli3$Sample.ID=="Positive Control"]-eli3$Absorbance[eli3$Sample.ID=="Negative Control"])
cat(file=logFile,append=T,"Normalising the ELISA OD data.\nDone.\n\n")
## Normalising the ELISA OD data.
## Done.
# combine the data
elisa<-data.frame(
  Participant.ID=c(eli1$Participant.ID,eli2$PARTICIPANT.ID,eli3$Sample.ID.1),
  Sample.ID=c(eli1$Sample.Lab.ID,eli2$SAMPLE.ID,eli3$Sample.ID),
  Visit=c(eli1$Visit,eli2$VISIT,eli3$Visit),
  Time.Point=c(eli1$Time.point,rep(NA,nrow(eli2)),eli3$Time.Point),
  Series=c(rep("S1",nrow(eli1)),rep("S2",nrow(eli2)),rep("S3",nrow(eli3))),
  OD=c(eli1$Optical.Density,eli2$OPTICAL.DENSITY,eli3$Absorbance),
  OD.norm=c(eli1$OD.norm,eli2$OD.norm,eli3$OD.norm)
)
elisa$Participant.ID<-as.character(elisa$Participant.ID)
elisa$Sample.ID<-as.character(elisa$Sample.ID)
elisa[elisa==""]<-NA
elisa[elisa=="N/A"]<-NA
elisa[elisa=="None"]<-NA
elisa$Participant.ID[elisa$Sample.ID=="Positive Control"]<-paste(sep=".","PosCtrl",elisa$Series[elisa$Sample.ID=="Positive Control"])
elisa$Sample.ID[elisa$Sample.ID=="Positive Control"]<-paste(sep=".","PosCtrl",elisa$Series[elisa$Sample.ID=="Positive Control"])
elisa$Participant.ID[elisa$Sample.ID=="Negative Control"]<-paste(sep=".","NegCtrl",elisa$Series[elisa$Sample.ID=="Negative Control"])
elisa$Sample.ID[elisa$Sample.ID=="Negative Control"]<-paste(sep=".","NegCtrl",elisa$Series[elisa$Sample.ID=="Negative Control"])

# Note the positive / negative result is done on the un-normalised ELISA data
# The threshold for dual channel is 0.09 (negative below, positive above), see DATA/20190820/pt5014insert_rev_0806.pdf
elisa$Result<-ifelse(elisa$OD>=0.09,"positive","negative")

elisa$Sample.ID.alt<-paste(sep=".Visit",elisa$Participant.ID,elisa$Visit)
pcr$Sample.ID<-paste(sep=".Visit",pcr$PATID,pcr$Visit)
pcrCt$Sample.ID<-paste(sep=".Visit",pcrCt$Participant.ID,pcrCt$Visit)
allSampIDs<-sort(unique(c(pcr$Sample.ID,pcrCt$Sample.ID,elisa$Sample.ID.Alt)))
dat<-cbind(pcr[match(allSampIDs,pcr$Sample.ID),],pcrCt[match(allSampIDs,pcrCt$Sample.ID),],elisa[match(allSampIDs,elisa$Sample.ID.alt),])
colnames(dat)<-c(paste(sep=".","PCR",colnames(pcr)),paste(sep=".","PCRCT",colnames(pcrCt)),paste(sep=".","ELISA",colnames(elisa)))

dat$PCRCT.Result<-factor(ifelse(dat$PCRCT.Ct<35,"positive","negative"),levels=c("positive","negative"))
dat$PCRCT.Result32<-factor(ifelse(dat$PCRCT.Ct<32,"positive","negative"),levels=c("positive","negative"))

cat(file=logFile,append=T,paste(sep="","Combining the ELISA data from the 3 series.\n",
                                "..output will have ",nrow(elisa)," rows, ",ncol(elisa)," columns.\n\n",
                                "Combining the ELISA, PCR and PCR Ct data.\n",
                                "..the combined data will consist of ",nrow(dat)," rows, ",ncol(dat)," columns.\n\n",
                                "Done.\n\n"))
## Combining the ELISA data from the 3 series.
## ..output will have 152 rows, 9 columns.
## 
## Combining the ELISA, PCR and PCR Ct data.
## ..the combined data will consist of 190 rows, 54 columns.
## 
## Done.
# add the ELISA positive / negative results to the pre-screening data
elisaTmp<-elisa[elisa$Visit=="-1",]
preScreen$ELISA.Result<-elisaTmp$Result[match(preScreen$PATID,elisaTmp$Participant.ID)]
preScreen$ELISA.OD<-elisaTmp$OD[match(preScreen$PATID,elisaTmp$Participant.ID)]
preScreen$ELISA.OD.norm<-elisaTmp$OD[match(preScreen$PATID,elisaTmp$Participant.ID)]
rm(elisaTmp)

# adding dummy PIDs
dat$dummyPID<-dummyPID$PCR.PATID2[match(dat$PCR.PATID,dummyPID$PCR.PATID)]

# writing output
write.csv(elisa,row.names=F,file=outFileElisa)
write.csv(dat,row.names=F,file=outFileMergedData)

cat(file=logFile,append=T,"Writing merged data to disk.\nDone.\n\n")
## Writing merged data to disk.
## Done.

TABLE X: Data summary table

sumStat<-function(dat,var,thr=NULL,thrLowHigh="lt"){
  N<-sum(!is.na(dat[,var]))
  perc<-100*N/nrow(dat)
  med<-median(dat[,var],na.rm=T)
  rng<-range(dat[,var],na.rm=T)
  
  res<-c(
    N,
    paste(sep="","(",format(nsmall=1,round(digits=1,perc)),"%)"),
    format(nsmall=2,round(digits=2,med)),
    paste(sep="","[",paste(collapse=",",format(nsmall=2,round(digits=2,rng))),"]")
  )
  
  if(!is.null(thr)){
    for(t in thr){
      if(thrLowHigh=="lt"){
        n<-sum(!is.na(dat[,var]) & dat[,var]<t)
      }else if(thrLowHigh=="leq"){
        n<-sum(!is.na(dat[,var]) & dat[,var]<=t)
      }else if(thrLowHigh=="gt"){
        n<-sum(!is.na(dat[,var]) & dat[,var]>t)
      }else if(thrLowHigh=="geq"){
        n<-sum(!is.na(dat[,var]) & dat[,var]>=t)
      }
      perc<-paste(sep="","(",format(nsmall=1,round(digits=1,100*n/N)),"%)")
      
      res<-c(res,n,perc)
    }
  }
  
  return(res)
}

tmpCt<-sumStat(dat=dat,var="PCRCT.Ct",thr=c(32,35),thrLowHigh="lt")
tmpLog2<-sumStat(dat=dat,var="PCR.log2_first")
tmpOD<-sumStat(dat=dat,var="ELISA.OD",thr=0.09,thrLowHigh="geq")
tmpODnorm<-sumStat(dat=dat,var="ELISA.OD.norm")

sumTab<-data.frame(
  assay=c(rep("qPCR",8),rep("ELISA",7)),
  measurement=c(rep("Ct",5),rep("log2(oocyst / gram)",3),rep("OD",4),rep("normalised OD",3)),
  groups=c(rep("",3),"Ct<32","Ct<35",rep("",6),"OD >= 0.09",rep("",3)),
  statistic=c("N (%)","Median","Range","n (% out of N)","n (% out of N)","N (%)","Median","Range","N (%)","Median","Range","n (% out of N)","N (%)","Median","Range"),
  value1=c(tmpCt[!grepl(tmpCt,pattern="%")],
          tmpLog2[!grepl(tmpLog2,pattern="%")],
          tmpOD[!grepl(tmpOD,pattern="%")],
          tmpODnorm[!grepl(tmpODnorm,pattern="%")]),
  value2=c(tmpCt[grepl(tmpCt,pattern="%")][1],"","",tmpCt[grepl(tmpCt,pattern="%")][2:3],tmpLog2[grepl(tmpLog2,pattern="%")],"","",tmpOD[grepl(tmpOD,pattern="%")][1],"","",tmpOD[grepl(tmpOD,pattern="%")][2],tmpODnorm[grepl(tmpODnorm,pattern="%")],"","")
)

sumTab<-sumTab[,-1] # first column wasn't needed when using kable_styling; quicker to just get rid of it like this rather than edit the above code

kable(sumTab,caption="Summaries of qPCR and ELISA measurements.",col.names=NULL) %>%
  kable_styling(full_width=F) %>%
  pack_rows("qPCR", 1, 8) %>%
  pack_rows("ELISA", 9, 15) %>%
  column_spec(1, bold = T) %>%
  collapse_rows(columns = 1, valign = "top")
Summaries of qPCR and ELISA measurements.
qPCR
Ct N (%) 180 (94.7%)
Median 29.10
Range [21.24,40.00]
Ct<32 n (% out of N) 147 (81.7%)
Ct<35 n (% out of N) 168 (93.3%)
log2(oocyst / gram) N (%) 180 (94.7%)
Median 14.03
Range [ 3.82,22.34]
ELISA
OD N (%) 141 (74.2%)
Median 0.00
Range [0.00,3.07]
OD >= 0.09 n (% out of N) 43 (30.5%)
normalised OD N (%) 141 (74.2%)
Median 0.00
Range [0.00,2.24]

Variability

Note that qPCR Ct values are right-censored with an upper limit of detection of \(Ct\geq40\) and ELISA OD values are left-censored with a lower limit of detection (\(OD<0.09\)). If means and variances are estimated directly from the data, then bias will be introduced. For this reason we use methods from survival analysis to take the censoring into account.

The most common method for this is estimation of the restricted mean and the associated standard error can be converted to an estimate of the standard deviation or variance. However this requires providing both the detection limit and the lowest (for ELISA OD) or highest (for qPCR Ct) possible value. For OD, this is simple: 0. For Ct, it is a little trickier, as it could be an infinite value.

Further, we would need to use unormalised ELISA OD and raw qPCR Ct values as the normalised ELISA OD and the log2 oocyst count would have different detection limits for different samples.

2 main choices here:

Note in both cases, for ELISA only positive samples would be used, while for qPCR both positive and negative (\(Ct\geq35\) but \(<40\)) would be used. Using raw / unnormalised data would have extra variation due to technical effects from the different experiments but would allow to account for the detection limit in a principled fashion. Using the log2 oocyst and normalised OD values would be rid of some of the technical variation but since different samples will have different detection limits, would make it difficult / impossible to account for the detection limits…

Overall coefficient of variation (CV)

pcr<-dat$PCR.log2_first[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct<40]
elisa<-dat$ELISA.OD.norm[!is.na(dat$ELISA.OD) & dat$ELISA.OD>=0.09]
  • qPCR log2 oocyst count per gram: overall mean = 14.17, overall sd = 3.47, overall CV = 0.24.

  • ELISA normalised OD: overall mean = 0.68, overall sd = 0.53, overall CV = 0.79.

Intra-individual CV

ttPCR<-dat %>%
  group_by(PCR.PATID) %>%
  summarise(mean=mean(PCR.log2_first[!is.na(PCRCT.Ct) & PCRCT.Ct<40],na.rm=T),sd=sd(PCR.log2_first[!is.na(PCRCT.Ct) & PCRCT.Ct<40],na.rm=T)) %>%
  mutate(CV=sd/mean)

#kable(ttPCR,caption="Mean, SD and CV of qPCR log2 oocyst count per gram values for each participant.")

The intra-individual CV of qPCR log2 oocyst count per gram is 0.17.

ttELISA<-dat %>%
  group_by(PCR.PATID) %>%
  summarise(mean=mean(ELISA.OD.norm[!is.na(ELISA.OD) & ELISA.OD>=0.09],na.rm=T),sd=sd(ELISA.OD.norm[!is.na(ELISA.OD) & ELISA.OD>=0.09],na.rm=T)) %>%
  mutate(CV=sd/abs(mean))

#kable(ttELISA,caption="Mean, SD and CV of ELISA normalised OD values for each participant.")

The intra-individual CV of ELISA normalised OD is 0.54.

Inter-individual CV

The mean of the participant qPCR log2 oocyst count means is 14.24, the SD of the participant qPCR log2 oocyst count means is 2.52 and the inter-individual CV of qPCR log2 oocyst count per gram is 0.18.

The mean of the participant ELISA OD means is 0.61, the SD of the participant ELISA OD means is 0.50 and the inter-individual CV of ELISA OD is 0.83.

Percentages of intra and inter SS out of total SS.

ssTotPCR<-var(dat$PCR.log2_first[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct<40],na.rm=T)*(sum(!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct<40)-1)
tmpDat<-dat %>%
  filter(!is.na(PCRCT.Ct) & PCRCT.Ct<40) %>%
  group_by(PCR.PATID) %>%
  summarise(mean=mean(PCR.log2_first,na.rm=T))
tmpDat<-data.frame(PATID=dat$PCR.PATID,PCRCT.Ct=dat$PCRCT.Ct,PCR.log2_first=dat$PCR.log2_first,meanOocyst=tmpDat$mean[match(dat$PCR.PATID,tmpDat$PCR.PATID)]) %>% filter(!is.na(PCRCT.Ct) & PCRCT.Ct<40)
tmpDat$diffSq=(tmpDat$PCR.log2_first-tmpDat$meanOocyst)^2
ssIntraPCR<-sum(tmpDat$diffSq,na.rm=T)
ssInterPCR<-sum((tmpDat$meanOocyst-mean(tmpDat$PCR.log2_first,na.rm=T))^2)

ssTotELISA<-var(dat$ELISA.OD.norm[!is.na(dat$ELISA.OD) & dat$ELISA.OD>=0.09],na.rm=T)*(sum(!is.na(dat$ELISA.OD) & dat$ELISA.OD>=0.09)-1)
tmpDat<-dat %>%
  filter(!is.na(dat$ELISA.OD) & ELISA.OD>=0.09) %>%
  group_by(PCR.PATID) %>%
  summarise(mean=mean(ELISA.OD.norm,na.rm=T))
tmpDat<-data.frame(PATID=dat$ELISA.OD.norm,ELISA.OD=dat$ELISA.OD,ELISA.OD.norm=dat$ELISA.OD.norm,meanOD=tmpDat$mean[match(dat$PCR.PATID,tmpDat$PCR.PATID)]) %>% filter(!is.na(dat$ELISA.OD) & ELISA.OD>=0.09)
tmpDat$diffSq=(tmpDat$ELISA.OD.norm-tmpDat$meanOD)^2
ssIntraELISA<-sum(tmpDat$diffSq,na.rm=T)
ssInterELISA<-sum((tmpDat$meanOD-mean(tmpDat$ELISA.OD.norm,na.rm=T))^2)

For qPCR Ct, 50.77% of the total variation is due to within participant variation and 49.23% is due to between participant variation.

For ELISA normalised OD 36.52% of the total variation is due to within participant variation and 63.48% is due to between participant variation.

Summary

Generally, there is more variability in ELISA normalised OD measurements than in log2 oocyst count per gram. For log2 oocyst count, inter and intra CVs are virtually the same whereas for ELISA normalised OD, the inter individual CV is larger than the intra individual CV. This last conclusion is also confirmed by looking at the sum of squares (SS) withing (intra) and between (inter) individuals: both are similar for qPCR, but the between SS is larger than the within individual SS.

Simple plots.

qPCR log2 oocyst per gram vs. ELISA normalised OD (first stool of the day).

# PCR log2(oocyst per gram in first stool) vs. ELISA normalised OD
g<-ggplot(data=dat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID)) +
  geom_point(cex=3) + 
  geom_line(lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
  scale_color_manual(values=c("steelblue","orange"))
print(g)

Time / spaghetti plots for log2 oocyst per gram and ELISA normalised OD.

# profiles over time for log2(oocyst per gram in first stool) and normalised OD
g1<-ggplot(data=dat,mapping=aes(x=PCR.Visit,y=PCR.log2_first,col=PCR.treatment,pch=PCR.PATID,group=PCR.PATID)) +
  geom_point() + 
  geom_line(data=dat[!is.na(dat$PCR.log2_first),]) +
  scale_shape_manual(values=seq(0,22)) +
  scale_color_manual(values=c("steelblue","orange")) +
  theme(legend.position="top")

datTmp<-dat[!is.na(dat$ELISA.OD.norm),]
g2<-ggplot(data=datTmp,mapping=aes(x=ELISA.Visit,y=ELISA.OD.norm,col=PCR.treatment,pch=ELISA.Participant.ID,group=ELISA.Participant.ID)) +
  geom_point() + 
  geom_line(na.rm=T) +
  scale_shape_manual(values=seq(0,22))+
  scale_color_manual(values=c("steelblue","orange")) +
  theme(legend.position="top")

grid.arrange(g1,g2,nrow=2)

Arithmetic mean and 95% confidence intervals over time per treatment group for qPCR log2 oocyst per gram and ELISA normalised OD.

plotMeanCIs<-function(datCFZ,datPLCB,varName,mTitle,logScale=F,xLabel="Study Day",yLabel,yLimits,days=c(-1,1:6,"19-24","41-55"),daysAlt=c(-1,1:8)){
  tmpMat<-matrix(nrow=4,ncol=length(days))
  if(!logScale){
    rownames(tmpMat)<-c("CFZ_mean","Placebo_mean","CFZ_SE","Placebo_SE")
  }else{
    rownames(tmpMat)<-c("CFZ_mean","Placebo_mean","CFZ_SElog","Placebo_SElog")
  }
  colnames(tmpMat)<-c(paste(sep="","day",days))
  
  for(j in 1:length(days)){
    if(!logScale){
      tmpMat["CFZ_mean",j]<-mean(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName],na.rm=T)
      tmpMat["CFZ_SE",j]<-sd(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName],na.rm=T)/sqrt(sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName])))
      tmpMat["Placebo_mean",j]<-mean(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName],na.rm=T)
      tmpMat["Placebo_SE",j]<-sd(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName],na.rm=T)/sqrt(sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName])))
    }else{
      tmpMat["CFZ_mean",j]<-exp(mean(log(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]),na.rm=T))
      tmpMat["CFZ_SElog",j]<-sd(log(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]),na.rm=T)/sqrt(sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName])))
      tmpMat["Placebo_mean",j]<-exp(mean(log(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]),na.rm=T))
      tmpMat["Placebo_SElog",j]<-sd(log(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]),na.rm=T)/sqrt(sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName])))
    }
  }
  
  plot(type="n",c(1,length(days)),yLimits,xlab=xLabel,ylab=yLabel,main=mTitle,axes=F)
  points(lty=2,col="steelblue",1:length(days),tmpMat["CFZ_mean",],cex=1.5)
  idxNoNA<-sort(which(!is.na(tmpMat["CFZ_mean",])))
  lines(lty=2,col="steelblue",(1:length(days))[idxNoNA],tmpMat["CFZ_mean",idxNoNA])
  points(lty=2,col="orange",1:length(days)+0.1,tmpMat["Placebo_mean",],pch=22,cex=1.5)
  idxNoNA<-sort(which(!is.na(tmpMat["Placebo_mean",])))
  lines(lty=2,col="orange",(1:length(days))[idxNoNA]+0.1,tmpMat["Placebo_mean",idxNoNA])
  for(j in 1:length(days)){
    qCfz<-qt(0.975,df=sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]))-1)
    qPlcb<-qt(0.975,df=sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]))-1)
    if(!logScale){
      segments(j,tmpMat["CFZ_mean",j],j,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],col="steelblue")
      segments(j,tmpMat["CFZ_mean",j],j,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],col="steelblue")
      segments(j-0.08,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],j+0.08,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],col="steelblue")
      segments(j-0.08,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],j+0.08,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],col="steelblue")
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],col="orange")
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],col="orange")
      segments(j+0.1-0.08,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],j+0.1+0.08,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],col="orange")
      segments(j+0.1-0.08,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],j+0.1+0.08,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],col="orange")
    }else{
      segments(j,tmpMat["CFZ_mean",j],j,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
      segments(j,tmpMat["CFZ_mean",j],j,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
      segments(j-0.08,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),j+0.08,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
      segments(j-0.08,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),j+0.08,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),col="steelblue")
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
      segments(j+0.1-0.08,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),j+0.1+0.08,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
      segments(j+0.1-0.08,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),j+0.1+0.08,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),col="orange")
    }
    
  }
  axis(side=1,at=1:length(days),labels=as.character(days))
  axis(side=2)
  box()
  legend(x="topright",col=c("steelblue","orange"),legend=c("CFZ","Placebo"),lty=1,pch=c(1,22),horiz=T,pt.cex=1.5)
}

plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="PCR.log2_first",mTitle="Mean log2 oocyst per gram over time.",yLabel="log2(oocyst per gram)",yLimits=range(dat$PCR.log2_first,na.rm=T))

plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm",mTitle="Mean normalised ELISA optical density over time.",yLabel="Normalised OD",yLimits=c(-0.25,1)) # can't work on the log scale as ELISA data has actual zeroes in it

dat$PCR.log2_first.cfb<-dat$PCR.log2_first
dat$ELISA.OD.norm.cfb<-dat$ELISA.OD.norm

for(pid in unique(dat$PCR.PATID)){
  baseLog2<-ifelse(!is.na(dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==1]),(dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==-1]+dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==1])/2,dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==-1])
  baseOD<-ifelse(!is.na(dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==1]),(dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==-1]+dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==1])/2,dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==-1])
  dat$PCR.log2_first.cfb[dat$PCR.PATID==pid]<-dat$PCR.log2_first[dat$PCR.PATID==pid]-baseLog2
  dat$ELISA.OD.norm.cfb[dat$PCR.PATID==pid]<-dat$ELISA.OD.norm[dat$PCR.PATID==pid]-baseOD
}

plotMeanCIs(datCFZ=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="Placebo",],varName="PCR.log2_first.cfb",mTitle="Mean log2 oocyst per gram over time (change from baseline).",yLabel="CFB log2(oocyst per gram)",yLimits=c(-7,5),days=c(2:6,"19-24","41-55"),daysAlt=2:8)

plotMeanCIs(datCFZ=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm.cfb",mTitle="Mean CFB normalised ELISA optical density over time (change from baseline).",yLabel=" CFB Normalised OD",yLimits=c(-0.5,0.9),days=c(2:6,"19-24","41-55"),daysAlt=2:8) 

FIGURE 1: Box & jitter plots comparing qPCR Ct values according to ELISA positivity and treatment group.

Ct threshold of 35

# clustered rank-sum test
datTmp<-dat %>% filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<40) %>% mutate(group=ifelse(ELISA.Result=="positive",1,0))
clusranksum<-clusWilcox.test(PCRCT.Ct~group+cluster(as.integer(factor(PCR.PATID))),data=datTmp,method="ds")
pvalClusRankSum<-clusranksum$p.value

# mixed model
lmmRes<-lme(PCRCT.Ct~factor(ELISA.Result),random=~1|PCR.PATID,correlation=corCompSymm(form=~1|PCR.PATID),data=datTmp)
pvalMixed<-summary(lmmRes)$tTable[2,"p-value"]

# mixed censored regression
pDat<-dat %>%
  filter(!is.na(ELISA.Result)) %>%
  mutate(group=ifelse(ELISA.Result=="positive",1,0)) %>%
  pdata.frame(index=c("PCR.PATID","PCR.Visit"))

censRegMod<-censReg(PCRCT.Ct~group,data=pDat,right=40,method="BHHH",nGHQ=128) # https://cran.r-project.org/web/packages/censReg/vignettes/censReg.pdf

pvalMixedCensReg<-summary(censRegMod)$estimate["group","Pr(> t)"]

g<-dat %>%
  filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
  mutate(pcrElisa=factor(case_when(
    ELISA.Result=="positive" & PCRCT.Result=="positive"~"PcrPosElisaPos",
    ELISA.Result=="positive" & PCRCT.Result=="negative"~"PcrNegElisaPos",
    ELISA.Result=="negative" & PCRCT.Result=="positive"~"PcrPosElisaNeg",
    ELISA.Result=="negative" & PCRCT.Result=="negative"~"PcrNegElisaNeg"
  ),levels=c("PcrNegElisaNeg","PcrPosElisaNeg","PcrPosElisaPos"))) %>%
  ggplot(aes(x=ELISA.Result, y=PCRCT.Ct, color=pcrElisa)) + 
  geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA, fill="grey95") +
  geom_jitter(position=position_jitter(height=0,width=0.2),size=3) +
  scale_color_manual(values=c("steelblue4","steelblue1","orange1"),labels=c("PCR(-) ELISA(-)","PCR(+) ELISA(-)","PCR(+) ELISA(+)"),name="") +
  xlab("ELISA") + 
  ylab("qPCR Ct") +
  ylim(c(20.5,40.75)) +
  theme_light() +
  theme(text=element_text(size=20),legend.position="bottom") +
  geom_abline(slope=0,intercept=35,lty=2,col="grey35",lwd=1.25) +
  ggtitle("qPCR Ct values according to ELISA positivity.") +
  geom_text(mapping=aes(x=1.5,y=40.75,label=paste(sep="","p = ",format(nsmall=4,round(digits=4,pvalMixedCensReg)))),size=8,col="grey35") +
  geom_text(mapping=aes(x=0.55,y=35.5,label="Ct=35"),size=6,col="grey35")

print(g)

png(file=paste(sep="",outPrefix,"_Fig1_BoxJitter_Ct35.png"),height=12,width=10,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen 
##                 2
sumStat<-dat %>%
  filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
  group_by(ELISA.Result) %>%
  summarise(median=format(nsmall=2,round(digits=2,median(PCRCT.Ct))),IQR=paste(sep="","(",paste(collapse=",",format(nsmall=2,round(digits=2,quantile(PCRCT.Ct,probs=c(0.25,0.75))))),")"))

print(sumStat)
## # A tibble: 2 x 3
##   ELISA.Result median IQR          
##   <chr>        <chr>  <chr>        
## 1 negative     29.75  (27.61,31.79)
## 2 positive     26.26  (23.91,28.51)

Ct threshold of 32

# clustered rank-sum test
datTmp<-dat %>% filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<40) %>% mutate(group=ifelse(ELISA.Result=="positive",1,0))
clusranksum<-clusWilcox.test(PCRCT.Ct~group+cluster(as.integer(factor(PCR.PATID))),data=datTmp,method="ds")
pvalClusRankSum<-clusranksum$p.value

# mixed model
lmmRes<-lme(PCRCT.Ct~factor(ELISA.Result),random=~1|PCR.PATID,correlation=corCompSymm(form=~1|PCR.PATID),data=datTmp)
pvalMixed<-summary(lmmRes)$tTable[2,"p-value"]

# mixed censored regression
pDat<-dat %>%
  filter(!is.na(ELISA.Result)) %>%
  mutate(group=ifelse(ELISA.Result=="positive",1,0)) %>%
  pdata.frame(index=c("PCR.PATID","PCR.Visit"))

censRegMod<-censReg(PCRCT.Ct~group,data=pDat,right=40,method="BHHH",nGHQ=128) # https://cran.r-project.org/web/packages/censReg/vignettes/censReg.pdf

pvalMixedCensReg<-summary(censRegMod)$estimate["group","Pr(> t)"]

dat %>%
  filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
  mutate(pcrElisa=factor(case_when(
    ELISA.Result=="positive" & PCRCT.Result32=="positive"~"PcrPosElisaPos",
    ELISA.Result=="positive" & PCRCT.Result32=="negative"~"PcrNegElisaPos",
    ELISA.Result=="negative" & PCRCT.Result32=="positive"~"PcrPosElisaNeg",
    ELISA.Result=="negative" & PCRCT.Result32=="negative"~"PcrNegElisaNeg"
  ),levels=c("PcrNegElisaNeg","PcrPosElisaNeg","PcrNegElisaPos","PcrPosElisaPos"))) %>%
  ggplot(aes(x=ELISA.Result, y=PCRCT.Ct, color=pcrElisa)) + 
  geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA, fill="grey95") +
  geom_jitter(position=position_jitter(height=0,width=0.2),size=3) +
  scale_color_manual(values=c("steelblue4","steelblue1","orange4","orange1"),labels=c("PCR(-) ELISA(-)","PCR(+) ELISA(-)","PCR(-) ELISA(+)","PCR(+) ELISA(+)"),name="") +
  xlab("ELISA") + 
  ylab("qPCR Ct") +
  ylim(c(20.5,40.75)) +
  theme_light() +
  theme(text=element_text(size=20),legend.position="bottom") +
  geom_abline(slope=0,intercept=32,lty=2,col="grey35",lwd=1.25) +
  ggtitle("qPCR Ct values according to ELISA positivity.") +
  geom_text(mapping=aes(x=1.5,y=40.75,label=paste(sep="","p = ",format(nsmall=4,round(digits=4,pvalMixedCensReg)))),size=8,col="grey35") +
  geom_text(mapping=aes(x=0.55,y=32.5,label="Ct=32"),size=6,col="grey35")

print(g)

png(file=paste(sep="",outPrefix,"_Fig1_BoxJitter_Ct32.png"),height=12,width=10,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen 
##                 2

The p-value reported on the figures above is from a mixed censored regression model and is the p-value testing the null hypothesis that the fixed effect coefficient for the grouping variable (ELISA positivity) is zero. This was implemented using the function censReg() from the censReg package in R (Henningsen, A., censReg: Censored Regression (Tobit) Models, v0.5-30, https://CRAN.R-project.org/package=censReg, 2019).

Summary 2x2 table

Summarising the data in a 2x2 table (note this is only for summary; any usual contingency test based on this table will not be valid due to multiple observations per patient):

Ct threshold of 35

tt<-table(dat$PCRCT.Result,dat$ELISA.Result)
tt<-data.frame(var1="qPCR",qPCR=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"])
tt<-tt[2:1,]

kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "ELISA" = 2)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
ELISA
negative positive
qPCR negative 9 0
positive 89 43

Ct threshold of 32

tt<-table(dat$PCRCT.Result32,dat$ELISA.Result)
tt<-data.frame(var1="qPCR",qPCR=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"])
tt<-tt[2:1,]

kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "ELISA" = 2)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
ELISA
negative positive
qPCR negative 23 2
positive 75 41
#The same figure as above, but now with 3 columns, also separating according to qPCR positivity (note: no sample that are ELISA positive but qPCR negative):
# 
# dat<-dat %>% 
#   mutate(groupPcrElisa=case_when(
#     dat$ELISA.Result=="positive" & dat$PCRCT.Result=="positive"~"ELISA positive\nqPCR positive",
#     dat$ELISA.Result=="positive" & dat$PCRCT.Result=="negative"~"ELISA positive\nqPCR negative",
#     dat$ELISA.Result=="negative" & dat$PCRCT.Result=="positive"~"ELISA negative\nqPCR positive",
#     dat$ELISA.Result=="negative" & dat$PCRCT.Result=="negative"~"ELISA negative\nqPCR negative",
#   ))
# 
# dat %>%
#   filter(!is.na(groupPcrElisa) & dat$PCRCT.Ct<40) %>%
#   ggplot(aes(x=groupPcrElisa, y=PCRCT.Ct, col=groupPcrElisa)) + 
#   geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA) +
#   geom_jitter(position=position_jitter(0.2),size=3) +
#   scale_color_manual(values=c("steelblue4","steelblue","orange","yellow"),labels=NULL,breaks=NULL) +
#   xlab("") + 
#   ylab("qPCR Ct") +
#   ylim(c(20.5,40.25)) + 
#   theme(text=element_text(size=20)) +
#   ggtitle("qPCR Ct values according to qPCR and ELISA positivity.")

Below are similar figures, but splitting on treatment group.

g1<-dat %>%
  filter(!is.na(ELISA.Result) & ELISA.Result=="negative") %>%
  ggplot(aes(x=PCR.treatment, y=PCRCT.Ct, col=PCR.treatment)) + 
  geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA) +
  geom_jitter(position=position_jitter(0.2),size=3) +
  scale_color_manual(values=c("steelblue","steelblue2"),labels=NULL,breaks=NULL) +
  xlab("Treatment") + 
  ylab("qPCR Ct") +
  ylim(c(20.5,40.25)) + 
  theme(text=element_text(size=20)) +
  ggtitle("qPCR Ct values according to treatment\n(ELISA negatives).")

g2<-dat %>%
  filter(!is.na(ELISA.Result) & ELISA.Result=="positive") %>%
  ggplot(aes(x=PCR.treatment, y=PCRCT.Ct, col=PCR.treatment)) + 
  geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA) +
  geom_jitter(position=position_jitter(0.2),size=3) +
  scale_color_manual(values=c("orange3","orange"),labels=NULL,breaks=NULL) +
  xlab("Treatment") + 
  ylab("qPCR Ct") +
  ylim(c(20.5,40.25)) + 
  theme(text=element_text(size=20)) +
  ggtitle("qPCR Ct values according to treatment\n(ELISA positives).")

grid.arrange(g1,g2,nrow=1)

SUPPL. FIGURE 1: qPCR and ELISA positivity over time per patient.

qPCR Ct threshold of 35

plotqPCRELISATimeHeatmap<-function(plotDat,pid,xlab="",ylab="",ctThr=35){
  plotDat<-plotDat %>% 
    mutate(PCR.log2_first.rs = scales::rescale(PCR.log2_first,to=c(0.2,1)),
           ELISA.OD.norm.rs = scales::rescale(ELISA.OD.norm,to=c(0.2,1))) %>%
    filter(PCR.PATID==pid) %>%
    dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCRCT.Ct,PCRCT.Result,PCR.log2_first.rs,ELISA.OD.norm.rs,ELISA.Result)
  
  plotDat$PCRCT.Result<-factor(ifelse(plotDat$PCRCT.Ct<ctThr,"P","N"),levels=c("P","N"))
  plotDat$ELISA.Result<-factor(ifelse(plotDat$ELISA.Result=="positive","P","N"),levels=c("P","N"))
  
  plotDat$PCR.Visit<-factor(plotDat$PCR.Visit,levels=c("-1","1","2","3","4","5","6","7","8"))
  levels(plotDat$PCR.Visit)[levels(plotDat$PCR.Visit)=="7"]<-"19-24"
  levels(plotDat$PCR.Visit)[levels(plotDat$PCR.Visit)=="8"]<-"41-55"
  
  plotDatWide1<-plotDat %>%
    dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCR.log2_first.rs,ELISA.OD.norm.rs) %>%
    pivot_longer(cols=c("PCR.log2_first.rs","ELISA.OD.norm.rs"),names_to="assay")
  plotDatWide1$assay<-factor(ifelse(plotDatWide1$assay=="PCR.log2_first.rs","qPCR","ELISA"))
  
  plotDatWide2<-plotDat %>%
    dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCRCT.Result,ELISA.Result) %>%
    pivot_longer(cols=c("PCRCT.Result","ELISA.Result"),names_to="assay")
  plotDatWide2$assay<-factor(ifelse(plotDatWide2$assay=="PCRCT.Result","qPCR","ELISA"))
  
  plotDatWide<-plotDatWide1
  plotDatWide$positivity<-plotDatWide2$value
  
  g1<-plotDatWide %>%
    group_by(assay) %>%
    complete(PCR.Visit) %>%
    ggplot(mapping=aes(y=assay,x=PCR.Visit,alpha=value,fill=assay)) +
    geom_tile(aes(alpha = value), color = "white") +
    scale_fill_manual(values=c("steelblue","orange"),drop=FALSE,na.value="gray") +
    coord_equal() +
    scale_alpha(na.value=0) +
    geom_text(mapping=aes(x=PCR.Visit,y=assay,label=positivity,alpha=1)) +
    theme(legend.position="none",
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.margin = unit(c(0.1,0.1,0.1,0), "pt"),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank()) +
    xlab(xlab) +
    ylab(ylab)
  
  datAnnot<-data.frame(
    x=rep(c(3,7,10),2),
    y=c(rep(2,3),rep(1,3)),
    label=c(pid,unique(plotDat$PCR.treatment[plotDat$PCR.PATID==pid & !is.na(plotDat$PCR.treatment)]),"qPCR","","","ELISA")
  )
  
  g2<-datAnnot %>%
    ggplot(mapping=aes(y=y,x=x)) +
    geom_tile(color = "white",alpha=0) +
    geom_text(mapping=aes(label=label)) +
    theme(legend.position="none",
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.margin = unit(c(0.1,0,0.1,0.1), "pt"),
          panel.background = element_blank(),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank()) +
    coord_equal()
  
  g<-plot_grid(g2,g1, align = "h", nrow = 1, rel_heights = rep(1,2))
  
  return(g)
}

g<-list()

datTmp <- dat %>%
  filter(dat$PCR.treatment=="CFZ")
datTmp$PCR.PATID<-factor(paste(sep="","CFZ",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=35)
}

datTmp <- dat %>%
  filter(dat$PCR.treatment=="Placebo")
#datTmp$PCR.PATID<-factor(paste(sep="","PCB",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
datTmp$PCR.PATID<-factor(datTmp$dummyPID)

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=35)
}

grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 35", gp=gpar(fontsize=18)))

png(file=paste(sep="",outPrefix,"_SupFig1_HeatMap_Ct35.png"),height=12,width=12,units="in",res=450)
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 35", gp=gpar(fontsize=18)))
dev.off()
## quartz_off_screen 
##                 2

qPCR Ct threshold of 32

g<-list()

datTmp <- dat %>%
  filter(dat$PCR.treatment=="CFZ")
datTmp$PCR.PATID<-factor(paste(sep="","CFZ",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=32)
}

datTmp <- dat %>%
  filter(dat$PCR.treatment=="Placebo")
#datTmp$PCR.PATID<-factor(paste(sep="","PCB",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
datTmp$PCR.PATID<-factor(datTmp$dummyPID)

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=32)
}

grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 32", gp=gpar(fontsize=18)))

png(file=paste(sep="",outPrefix,"_SupFig1_HeatMap_Ct32.png"),height=12,width=12,units="in",res=450)
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 32", gp=gpar(fontsize=18)))
dev.off()
## quartz_off_screen 
##                 2

Calculating concordance of ‘meaningful’ change between log oocyst and OD

substantialDelta<-function(m1,m2,type="sd",minChange=0.1,sdVal=1){
  # m1, m2 = measurements at day i and i+1 that are compared
  # type = one of 'sd' (need a change of at least minChnage*sd) or 'rel' (need a relative change of minChange from m1 to m2)
  # minChange = the minimum (positive or negative) difference needed to consider that m2 is meaningfully different from m1; if type=="sd", m2-m1 expressed as a multiple of the standard deviation; if type=="rel" the percentage change of m2 compared to m1
  # sd = standard deviation value to be used when type=="sd"
  
  deltaAbs<-m2-m1
  if(m1!=0){
    deltaRel<-(m2-m1)/m1
  }else{
    if(m2==0){deltaRel<-0}else{deltaRel<-Inf*sign(deltaAbs)}
  }
  
  res<-0
  if((type=="sd" & abs(deltaAbs)>=minChange*sdVal) | (type=="rel" & abs(deltaRel)>=minChange)){
    res<-1*sign(deltaAbs)
  }
  
  return(res)
}

concordFun<-function(dat){
  pids<-unique(dat$dummyPID)
  visits<-c(-1,1:8)
  
  sdPCR<-sd(dat$PCR.log2_first,na.rm=T)
  sdELISA<-sd(dat$ELISA.OD.norm,na.rm=T)
  
  resSd<-numeric(0)
  resRel<-numeric(0)
  
  # +2 == change in both, same direction
  # +1 == change PCR, no change ELISA
  # 0 == no change in either
  # -1 == no change PCR, change ELISA
  # -2 == both change, opposite direction
  
  for(p in pids){
    datTmp<-dat %>% filter(dummyPID==p & !is.na(PCR.Visit) & !is.na(ELISA.Visit))
    
    cur<-visits[1]
    for(i in 2:length(visits)){
      v<-visits[i]
      if(sum(datTmp$PCR.Visit==cur)==1 & sum(datTmp$ELISA.Visit==cur)==1 & sum(datTmp$PCR.Visit==v)==1 & sum(datTmp$ELISA.Visit==v)==1){
        if(!is.na(datTmp$PCR.log2_first[datTmp$PCR.Visit==cur]) & !is.na(datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur]) & !is.na(datTmp$PCR.log2_first[datTmp$PCR.Visit==v]) & !is.na(datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v])){
          # deltaPCR<-datTmp$PCR.log2_first[datTmp$PCR.Visit==v]-datTmp$PCR.log2_first[datTmp$PCR.Visit==cur]
          # deltaELISA<-datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v]-datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur]
          # 
          # relDeltaPCR<-deltaPCR/datTmp$PCR.log2_first[datTmp$PCR.Visit==cur]-1
          # relDeltaELISA<-deltaELISA/datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur]-1
          # 
          # deltaPCR<-deltaPCR/sdPCR
          # deltaELISA<-deltaELISA/sdELISA
          
          subChangePCRSd<-substantialDelta(m1=datTmp$PCR.log2_first[datTmp$PCR.Visit==cur],m2=datTmp$PCR.log2_first[datTmp$PCR.Visit==v],type="sd",sdVal=sdPCR,minChange=0.1)
          subChangePCRRel<-substantialDelta(m1=datTmp$PCR.log2_first[datTmp$PCR.Visit==cur],m2=datTmp$PCR.log2_first[datTmp$PCR.Visit==v],type="rel",minChange=0.1)
          
          subChangeELISASd<-substantialDelta(m1=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur],m2=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v],type="sd",sdVal=sdELISA,minChange=0.1)
          subChangeELISARel<-substantialDelta(m1=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==cur],m2=datTmp$ELISA.OD.norm[datTmp$ELISA.Visit==v],type="rel",minChange=0.1)
          
          if((subChangePCRSd==1 & subChangeELISASd==1) | (subChangePCRSd==-1 & subChangeELISASd==-1)){
            resSdTmp<-2
          }else if((subChangePCRSd==1 & subChangeELISASd==-1) | (subChangePCRSd==-1 & subChangeELISASd==1)){
            resSdTmp<-(-2)
          }else if(subChangePCRSd==0 & subChangeELISASd==0){
            resSdTmp<-0
          }else if(abs(subChangePCRSd)==1 & subChangeELISASd==0){
            resSdTmp<-1
          }else if(subChangePCRSd==0 & abs(subChangeELISASd)==1){
            resSdTmp<-(-1)
          }
          
          if((subChangePCRRel==1 & subChangeELISARel==1) | (subChangePCRRel==-1 & subChangeELISARel==-1)){
            resRelTmp<-2
          }else if((subChangePCRRel==1 & subChangeELISARel==-1) | (subChangePCRRel==-1 & subChangeELISARel==1)){
            resRelTmp<-(-2)
          }else if(subChangePCRRel==0 & subChangeELISARel==0){
            resRelTmp<-0
          }else if(abs(subChangePCRRel)==1 & subChangeELISARel==0){
            resRelTmp<-1
          }else if(subChangePCRRel==0 & abs(subChangeELISARel)==1){
            resRelTmp<-(-1)
          }
          
          resSd<-c(resSd,resSdTmp)
          resRel<-c(resRel,resRelTmp)
          
          cur<-v
        }
      }
    }
  }
  
  return(list(sd=resSd,rel=resRel))
}

res<-concordFun(dat)
resSd<-res$sd
resRel<-res$rel

print(table(resSd))
## resSd
## -2 -1  0  1  2 
## 13  3 10 63 30
print(table(resRel))
## resRel
## -2 -1  0  1  2 
## 26 33 11 10 39
N<-1e3
pRanSd<-rep(NA,N)
pRanRel<-rep(NA,N)

for(j in 1:N){
  # randomise
  idxAllNA<-which( is.na(dat$PCR.log2_first) & is.na(dat$ELISA.OD.norm) )
  
  idxComplete<-which( !is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm) )
  idxPcrNA<-which( is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm) )
  idxElisaNA<-which( !is.na(dat$PCR.log2_first) & is.na(dat$ELISA.OD.norm) )
  
  idxCompleteRan1<-sample(x=idxComplete,size=length(idxComplete),replace=F)
  idxPcrNARan1<-sample(x=idxPcrNA,size=length(idxPcrNA),replace=F)
  idxElisaNARan1<-sample(x=idxElisaNA,size=length(idxElisaNA),replace=F)
    
  idxCompleteRan2<-sample(x=idxComplete,size=length(idxComplete),replace=F)
  idxPcrNARan2<-sample(x=idxPcrNA,size=length(idxPcrNA),replace=F)
  idxElisaNARan2<-sample(x=idxElisaNA,size=length(idxElisaNA),replace=F)
  
  idx<-c(idxComplete,idxPcrNA,idxElisaNA,idxAllNA)
  idxRan1<-c(idxCompleteRan1,idxPcrNARan1,idxElisaNARan1,idxAllNA)
  idxRan2<-c(idxCompleteRan2,idxPcrNARan2,idxElisaNARan2,idxAllNA)

  datTmp<-dat
  datTmp$PCR.log2_first[idx]<-dat$PCR.log2_first[idxRan1]
  datTmp$ELISA.OD.norm[idx]<-dat$ELISA.OD.norm[idxRan2]
  
  
  # compute
  res<-concordFun(dat=datTmp)
  resSdTmp<-sum(res$sd==0 | res$sd==2)
  resRelTmp<-sum(res$rel==0 | res$rel==2)
  
  #check
  pRanSd[j]<-ifelse(resSdTmp>=sum(resSd==0 | resSd==2),1,0)
  pRanRel[j]<-ifelse(resRelTmp>=sum(resRel==0 | resRel==2),1,0)
}

We will use 2 definitions of meaningful or substantial change:

  1. change of at least 0.1 standard deviations
  2. relative increase / decrease from previous value of at least 10%.

The two tabulations below list how many times we see different types of concordant or discordant pairs of successive measurements:

Concordant codes:

  • +2 == change in both, same direction
  • 0 == no change in either

Discordant codes:

  • +1 == change PCR, no change ELISA
  • -1 == no change PCR, change ELISA
  • -2 == both change, opposite direction
pRandSd<-sum(pRanSd)/N
pRandRel<-sum(pRanRel)/N

Using definition (a)

There are 106 substantial changes in PCR log2 oocyst count and 46 substantial changes in ELISA normalised OD.

Of the 109 instances where at least one of PCR log2 oocyst count or ELISA normalised OD changed substantially, 30 instances were in the same direction (27.5%). We would expect 17.2% by random chance alone.

Of all 119 pairs of successive measurements that were considered, 40 were concordant (no change or change in the same direction; 33.6%, p-vaue for this happening by chance: 0.096).

Using definition (b)

There are 75 substantial changes in PCR log2 oocyst count and 98 substantial changes in ELISA normalised OD.

Of the 108 instances where at least one of PCR log2 oocyst count or ELISA normalised OD changed substantially, 39 instances were in the same direction (36.1%). We would expect 26% by random chance alone.

Of all 119 pairs of successive measurements that were considered, 50 were concordant (no change or change in the same direction; 42%, p-value for this happening by chance: 0.184).

Mixed effects regression spline fits to test for association between PCR log2 oocyst count and ELISA normalised OD

Natural cubic spline with 3 knots

# Q: use multiple imputation??

## prepare data
datTmp<-dat[,c("PCR.PATID","PCR.log2_first","ELISA.OD.norm")]
datTmp<-datTmp[rowSums(is.na(datTmp))==0,]

## fit model
modMixSpline<-lmer(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,knots=c(12,17,22)) + (1|PCR.PATID), data=datTmp, REML = TRUE)
#modMixSpline<-lme(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,df=4),random=~1|PCR.PATID, data=datTmp, method = "REML")
# 
# fn<-function(pars){
#   modTmp<-try(lmer(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,knots=pars) + (1|PCR.PATID), data=datTmp, REML = TRUE))
#   if(is.character(modTmp)){
#     return(1e6)
#   }else{
#     return(-as.numeric(summary(modTmp)$logLik))
#   }
# }
# 
# knotsOpt<-optim(par=c(12,17,22),fn=fn)

## fit null model (natural cubic spline)
modMixNull<-lmer(ELISA.OD.norm ~ 1 + (1|PCR.PATID), data=datTmp, REML = TRUE)

## do likelihood ratio test
modMixSplineML<-lmer(ELISA.OD.norm ~ 1 + ns(PCR.log2_first,knots=c(12,17,22)) + (1|PCR.PATID), data=datTmp, REML = FALSE)
modMixNullML<-lmer(ELISA.OD.norm ~ 1 + (1|PCR.PATID), data=datTmp, REML = FALSE)
lrtPval<-signif(digits=3,anova(modMixSplineML,modMixNullML)$`Pr(>Chisq)`[2])

## plot
newdat <- data.frame(PCR.log2_first=seq(0,25,length=100),ELISA.OD.norm=NA)
newdat$ELISA.OD.norm <- predict(modMixSpline,newdat,re.form=NA)
mm <- model.matrix(terms(modMixSpline),newdat)

# http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#predictions-andor-confidence-or-prediction-intervals-on-predictions
# https://stat.ethz.ch/pipermail/r-sig-mixed-models/2010q2/003585.html
# 
pvar1 <- diag(mm %*% tcrossprod(vcov(modMixSpline),mm))
tvar1 <- pvar1+VarCorr(modMixSpline)$PCR.PATID[1]  ## must be adapted for more complex models
cmult <- 1.96
newdat <- data.frame(
    newdat,
    plo = newdat$ELISA.OD.norm-cmult*sqrt(pvar1),
    phi = newdat$ELISA.OD.norm+cmult*sqrt(pvar1),
    tlo = newdat$ELISA.OD.norm-cmult*sqrt(tvar1),
    thi = newdat$ELISA.OD.norm+cmult*sqrt(tvar1)
)

g<-ggplot(data=newdat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm)) +
  geom_line(col="black",lwd=2) +
  geom_ribbon(aes(ymin=tlo, ymax=thi),col=NA,alpha=0.3,fill="darkgrey") +
  geom_point(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),data=dat,cex=3) + 
  geom_line(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
  scale_color_manual(values=c("steelblue","orange"),labels=c("CFZ","Placebo")) +
  labs(title=paste(sep="\n","Mixed effects, natural cubic spline (df=4; 3 knots) model.","Confidence band based on fixed-effects uncertainty + random-effects variance.",paste(sep="","Likelihood ratio test p-value for association: ",lrtPval,"."))) +
  xlab("log2 oocyst count per gram") +
  ylab("ELISA normalised OD") +
  theme(text=element_text(size=18))

print(g)

png(width=16,height=9,res=300,units="in",file="cubicSplineFit.png")
print(g)
dev.off()
## quartz_off_screen 
##                 2

Linear spline with 1 knot

## fit model (linear spline)
modMixSpline<-lmer(ELISA.OD.norm ~ 1 + bs(PCR.log2_first,knots=c(10),degree=1) + (1|PCR.PATID), data=datTmp, REML = TRUE)
# fn<-function(pars){
#   modTmp<-try(lmer(ELISA.OD.norm ~ 1 + bs(PCR.log2_first,knots=pars,degree=1) + (1|PCR.PATID), data=datTmp, REML = TRUE))
#   if(is.character(modTmp)){
#     return(1e6)
#   }else{
#     return(-as.numeric(summary(modTmp)$logLik))
#   }
# }
# 
# knotsOpt<-optim(par=c(10),fn=fn,method="Brent",lower=0,upper=22)

## do likelihood ratio test
modMixSplineML<-lmer(ELISA.OD.norm ~ 1 + bs(PCR.log2_first,knots=c(10),degree=1) + (1|PCR.PATID), data=datTmp, REML = FALSE)
lrtPval<-signif(digits=3,anova(modMixSplineML,modMixNullML)$`Pr(>Chisq)`[2])

## plot
newdat <- data.frame(PCR.log2_first=seq(0,25,length=100),ELISA.OD.norm=NA)
newdat$ELISA.OD.norm <- predict(modMixSpline,newdat,re.form=NA)
mm <- model.matrix(terms(modMixSpline),newdat)

# http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#predictions-andor-confidence-or-prediction-intervals-on-predictions
# https://stat.ethz.ch/pipermail/r-sig-mixed-models/2010q2/003585.html
# 
pvar1 <- diag(mm %*% tcrossprod(vcov(modMixSpline),mm))
tvar1 <- pvar1+VarCorr(modMixSpline)$PCR.PATID[1]  ## must be adapted for more complex models
cmult <- 1.96
newdat <- data.frame(
    newdat,
    plo = newdat$ELISA.OD.norm-cmult*sqrt(pvar1),
    phi = newdat$ELISA.OD.norm+cmult*sqrt(pvar1),
    tlo = newdat$ELISA.OD.norm-cmult*sqrt(tvar1),
    thi = newdat$ELISA.OD.norm+cmult*sqrt(tvar1)
)

g<-ggplot(data=newdat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm)) +
  geom_line(col="black",lwd=2) +
  geom_ribbon(aes(ymin=tlo, ymax=thi),col=NA,alpha=0.3,fill="darkgrey") +
  geom_point(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),data=dat,cex=3) + 
  geom_line(aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID),lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
  scale_color_manual(values=c("steelblue","orange"),labels=c("CFZ","Placebo")) +
  labs(title=paste(sep="\n","Mixed effects, linear spline (df=2; 1 knot) model.","Confidence band based on fixed-effects uncertainty + random-effects variance.",paste(sep="","Likelihood ratio test p-value for association: ",lrtPval,"."))) +
  xlab("log2 oocyst count per gram") +
  ylab("ELISA normalised OD") +
  theme(text=element_text(size=18))

print(g)

png(width=16,height=9,res=300,units="in",file="linearSplineFit.png")
print(g)
dev.off()
## quartz_off_screen 
##                 2

Repeated measures correlation between PCR log2 oocyst counts and ELISA normalised optical densities.

Note:

This is using the PCR data from the first stool of the day. Please confirm that this is also what the ELISA was run on.

Update: yes, confirmed.

\[\,\]

  1. For all observations with qPCR < 35.
# repeated measures correlation (PCR log2-oocyst per gram and ELISA normalised OD)
dat$PCR.PATID<-factor(dat$PCR.PATID)
dat2<-dat[dat$PCRCT.Ct<35,]
dat2$PCR.log2_first_rank<-rank(dat2$PCR.log2_first)
dat2$ELISA.OD.norm_rank<-rank(dat2$ELISA.OD.norm)

corPearsonRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first",measure2="ELISA.OD.norm",dataset=dat2)
corSpearmanRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first_rank",measure2="ELISA.OD.norm_rank",dataset=dat2)

Pearson correlation coefficient (repeated measures): 0.3753 (95% CI = [0.2014,0.5262], p = 0.000049).

Spearman correlation coefficient (rank-based / non-parametric, repeated measures): 0.1264 (95% CI = [-0.0374,0.2836], p = 0.127104).

\[\,\]

  1. For all observations with qPCR < 35 and ELISA normalised OD > 0.
# repeated measures correlation (PCR log2-oocyst per gram and ELISA normalised OD)
dat$PCR.PATID<-factor(dat$PCR.PATID)
dat2<-dat[dat$PCRCT.Ct<35 & dat$ELISA.OD.norm>0,]
dat2$PCR.log2_first_rank<-rank(dat2$PCR.log2_first)
dat2$ELISA.OD.norm_rank<-rank(dat2$ELISA.OD.norm)

corPearsonRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first",measure2="ELISA.OD.norm",dataset=dat2)
corSpearmanRM<-rmcorr(participant="PCR.PATID",measure1="PCR.log2_first_rank",measure2="ELISA.OD.norm_rank",dataset=dat2)

Pearson correlation coefficient (repeated measures): 0.3788 (95% CI = [0.1446,0.5728], p = 0.001861).

Spearman correlation coefficient (rank-based / non-parametric, repeated measures): 0.3854 (95% CI = [0.1522,0.578], p = 0.001522).

qPCR Ct values vs. qPCR derived log2 oocyst counts

Note that what looks like a single point at log2 oocyst per gram = 9.86, Ct=40 are in fact 4 samples:

CFZ0010025.Visit7, CFZ0010147.Visit4, CFZ0010147.Visit6, CFZ0010179.Visit6.

dat %>%
  filter(PCRCT.Ct<35) %>%
ggplot(mapping=aes(x=PCR.log2_first,y=PCRCT.Ct,col=PCRCT.Experiment)) +
  geom_point(size=2) +
  #geom_smooth(se=F) +
  #geom_text(data=dat[dat$PCRCT.Ct>35 | (dat$PCRCT.Ct>30 & dat$PCR.log2_first>15) | (dat$PCRCT.Ct<29 & dat$PCR.log2_first<11.5),],mapping=aes(x=PCR.log2_first,y=PCRCT.Ct,label=PCR.Sample.ID),nudge_x=0,nudge_y=0.35) +
  xlab("log2 oocyst per gram") +
  ylab("qPCR Ct value") +
  ggtitle("qPCR CT vs qPCR log2 oocyst per gram (first stool of the day).") +
  scale_color_manual(values=c("steelblue","salmon","orange","cyan","mediumorchid","darkgrey","brown","darkgreen","greenyellow","black"),name="PCR experiment") +
  theme(text=element_text(size=16))

These are all the potentially problematic samples:

datTmp<-dat[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct>35,c("PCR.Sample.ID","PCRCT.Lab.Sample.ID","PCR.log2_first","PCRCT.Ct")]
datTmp<-datTmp[order(datTmp$PCRCT.Ct),]
kable(datTmp,row.names=F) %>%
  kable_styling()
PCR.Sample.ID PCRCT.Lab.Sample.ID PCR.log2_first PCRCT.Ct
CFZ0011548.Visit7 CDL127F1 8.17047 35.260
CFZ0011019.Visit3 CDH131F1 7.38882 35.414
CFZ0010010.Visit5 CDJ102F1 8.44481 35.545
CFZ0010147.Visit5 CDJ117F1 5.61746 36.206
CFZ0010147.Visit7 CDL10MF1 5.50515 36.320
CFZ0010147.Visit1 CDA10XF1 5.05775 36.776
CFZ0011086.Visit8 CDM14WF1 8.31257 37.091
CFZ0010293.Visit8 CDM10XF1 3.81738 38.003
CFZ0010025.Visit7 CDL108F1 9.85720 40.000
CFZ0010147.Visit4 CDI10NF1 9.85720 40.000
CFZ0010147.Visit6 CDE10PF1 9.85720 40.000
CFZ0010179.Visit6 CDE11EF1 9.85720 40.000

FIGURE 2: Specificity & sensitivity (ROC curve) of ELISA (using normalised OD values) assuming PCR as gold standard and taking into account the longitudinal nature of the data.

compSensSpec<-function(dat,timeVar,goldStdVar,diagVar,goldStdThreshold,nEval=1000,logScale=F,logScaleOffSet=0){
  times<-unique(dat[,timeVar])
  res<-data.frame(threshold=rep(NA,nEval),sensitivity=rep(NA,nEval),specificity=rep(NA,nEval),PPV=rep(NA,nEval),NPV=rep(NA,nEval),YoudenJ=NA)
  
  dat<-dat[!is.na(dat[,goldStdVar]) & !is.na(dat[,diagVar]),]
  
  rangeDiag<-range(dat[,diagVar])
  if(logScale){
    diagThr<-exp(seq(log(rangeDiag[1]+logScaleOffSet),log(rangeDiag[2]+logScaleOffSet),length=nEval))-logScaleOffSet
  }else{
    diagThr<-seq(rangeDiag[1],rangeDiag[2],length=nEval)
  }
  res$threshold<-diagThr
  
  dat$goldStd<-ifelse(dat[,goldStdVar]<=goldStdThreshold,1,0)
  
  sensMat<-matrix(nrow=nEval,ncol=length(times))
  specMat<-sensMat
  ppvMat<-sensMat
  npvMat<-sensMat
  sensWeights<-sensMat
  specWeights<-sensMat
  ppvWeights<-sensMat
  npvWeights<-sensMat
  
  totPos<-sum(dat$goldStd==1)
  totNeg<-sum(dat$goldStd==0)
  
  for(i in 1:nEval){
    for(j in 1:length(times)){
      tmpGS<-dat$goldStd[dat[,timeVar]==times[j]]
      tmpDiag<-dat[dat[,timeVar]==times[j],diagVar]
      tmpDiag<-ifelse(tmpDiag>=diagThr[i],1,0)
      
      sensMat[i,j]<-sum(tmpGS==1 & tmpDiag==1)/sum(tmpGS==1)
      specMat[i,j]<-sum(tmpGS==0 & tmpDiag==0)/sum(tmpGS==0)
      ppvMat[i,j]<-sum(tmpGS==1 & tmpDiag==1)/sum(tmpDiag==1)
      npvMat[i,j]<-sum(tmpGS==0 & tmpDiag==0)/sum(tmpDiag==0)
      sensWeights[i,j]<-sum(tmpGS==1)/totPos
      specWeights[i,j]<-sum(tmpGS==0)/totNeg
      ppvWeights[i,j]<-sum(tmpDiag==1)/sum(dat[,diagVar]>=diagThr[i])
      npvWeights[i,j]<-sum(tmpDiag==0)/sum(dat[,diagVar]<diagThr[i])
    }
    
    res$sensitivity[i]<-sum((sensWeights[i,]*sensMat[i,])[!is.nan(sensMat[i,])])
    res$specificity[i]<-sum((specWeights[i,]*specMat[i,])[!is.nan(specMat[i,])])
    res$PPV[i]<-sum(ppvWeights[i,]*ppvMat[i,])
    res$NPV[i]<-sum(npvWeights[i,]*npvMat[i,])
  }
  
  res$YoudenJ<-res$sensitivity+res$specificity-1
  return(res)
}


sensSpec35<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.OD.norm",goldStdThreshold=35,nEval=1000,logScale=T,logScaleOffSet=0.001)
sensSpec32<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.OD.norm",goldStdThreshold=32,nEval=1000,logScale=T,logScaleOffSet=0.001)
#sensSpec35<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.OD.norm",goldStdThreshold=35,nEval=5000) # cannot do this: almost no negative samples according to PCR --> lots of divisions by 0

sens<-sensSpec35$sensitivity
dsens<-c(diff(sensSpec35$sensitivity),0)
dspec<-abs(c(diff(1-sensSpec35$specificity),0))
auc<-sum(sens*dspec) + sum(dsens*dspec)/2

maxJ<-max(sensSpec35$YoudenJ,na.rm=T)
tmpSensSpec<-sensSpec35[sensSpec35$YoudenJ==maxJ,]
bestSens<-unique(tmpSensSpec$sensitivity)
bestSpec<-unique(tmpSensSpec$specificity)
bestOD<-median(tmpSensSpec$threshold)

g<-ggplot(data=as.data.frame(sensSpec35),mapping=aes(x=1-specificity,y=sensitivity)) +
  geom_path(lwd=1.75,col="steelblue") +
  geom_point(size=1.75) +
  geom_abline(intercept=0,slope=1,lty=2,col="darkgrey",lwd=1) +
  ggtitle(paste("ROC curve for normalised ELISA OD vs. PCR as gold standard with threshold\nCt = 35 (AUC = ",round(auc,digits=2),").",sep="")) + 
  theme_light() +
  theme(text=element_text(size=18)) +
  geom_hline(mapping=aes(yintercept=bestSens),lwd=1.2,lty=3,col="orange") + 
  geom_vline(mapping=aes(xintercept=1-bestSpec),lwd=1.2,lty=3,col="orange") +
  geom_text(mapping=aes(x=0.033,y=0.5,label=paste(sep="","J = ",format(nsmall=2,round(digits=2,maxJ)))),col="orange",size=7)

print(g)

png(file=paste(sep="",outPrefix,"_Fig2_ROC_Ct35.png"),height=12,width=12,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen 
##                 2
sens<-sensSpec32$sensitivity
dsens<-c(diff(sensSpec32$sensitivity),0)
dspec<-abs(c(diff(1-sensSpec32$specificity),0))
auc<-sum(sens*dspec) + sum(dsens*dspec)/2

maxJ<-max(sensSpec32$YoudenJ,na.rm=T)
tmpSensSpec<-sensSpec32[sensSpec32$YoudenJ==maxJ,]
bestSens<-unique(tmpSensSpec$sensitivity)
bestSpec<-unique(tmpSensSpec$specificity)
bestOD<-median(tmpSensSpec$threshold)

g<-ggplot(data=as.data.frame(sensSpec32),mapping=aes(x=1-specificity,y=sensitivity)) +
  geom_path(lwd=1.75,col="steelblue") +
  geom_point(size=1.75) +
  geom_abline(intercept=0,slope=1,lty=2,col="darkgrey",lwd=1) +
  ggtitle(paste("ROC curve for normalised ELISA OD vs. PCR as gold standard with threshold\nCt = 32 (AUC = ",round(auc,digits=2),").",sep="")) + 
  theme_light() +
  theme(text=element_text(size=18)) +
  geom_hline(mapping=aes(yintercept=bestSens),lwd=1.2,lty=3,col="orange") + 
  geom_vline(mapping=aes(xintercept=1-bestSpec),lwd=1.2,lty=3,col="orange") +
  geom_text(mapping=aes(x=0.03,y=0.5,label=paste(sep="","J = ",format(nsmall=2,round(digits=2,maxJ)))),col="orange",size=7)

print(g)

png(file=paste(sep="",outPrefix,"_Fig2_ROC_Ct32.png"),height=12,width=12,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen 
##                 2

Note that the AUC above is for the actual area under the plotted curve. The AUC can be interpreted as the probability of a randomly chosen positive sample having a larger value in the evaluated diagnostic than a randomly chosen negative sample. Calculating the AUC this way directly, gives the same result.

(Though note that this latter calculation currently ignores the correlated / longitudinal nature of the data).

datNoNA<-dat[!is.na(dat$PCRCT.Ct) & !is.na(dat$ELISA.OD.norm),]
pos<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct<=35],size=1e7,replace=T)
neg<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct>35],size=1e7,replace=T)
probaAUC<-(sum(pos>neg) + sum(pos==neg)/2)/1e7

cat(paste(sep="","The probabilistic AUC (Ct threshold 35) is ",round(digits=2,probaAUC),".\n"))
## The probabilistic AUC (Ct threshold 35) is 0.65.
datNoNA<-dat[!is.na(dat$PCRCT.Ct) & !is.na(dat$ELISA.OD.norm),]
pos<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct<=32],size=1e7,replace=T)
neg<-sample(datNoNA$ELISA.OD.norm[datNoNA$PCRCT.Ct>32],size=1e7,replace=T)
probaAUC<-(sum(pos>neg) + sum(pos==neg)/2)/1e7

cat(paste(sep="","The probabilistic AUC (Ct threshold 32) is ",round(digits=2,probaAUC),".\n"))
## The probabilistic AUC (Ct threshold 32) is 0.69.

We can also calculate Youden’s J index. This can be interpreted as the probability of a positive result (i.e. result based on ELISA normalised OD being above the decision threshold) given that the sample is qPCR positive. Of specific interest are the maximum value of Youden’s J and what is the ELISA OD decision threshold associated with that.

In our case, the maximum Youden’s J is achieved for sensitivity 0.47 and specificity 0.92, corresponding to a median ELISA normalised OD threshold of 0.0035.

The maximum Youden index is indicated in orange on the ROC curve above.

Specificity, sensitivity, PPV and NPV of ELISA (using binary positive/negative output) assuming PCR as gold standard and taking into account the longitudinal nature of the data.

dat$ELISA.bin<-ifelse(dat$ELISA.OD>0.09,1,0)
ELISAbinSensSpec35<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=35,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
ELISAbinSensSpec32<-compSensSpec(dat=dat[!is.element(el=dat$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=32,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]

sensSpecPpvNpv<-data.frame(
  qPCR_Thr=c(32,35),
  sensitivity=c(ELISAbinSensSpec32$sensitivity,ELISAbinSensSpec35$sensitivity),
  specificity=c(ELISAbinSensSpec32$specificity,ELISAbinSensSpec35$specificity),
  PPV=c(ELISAbinSensSpec32$PPV,ELISAbinSensSpec35$PPV),
  NPV=c(ELISAbinSensSpec32$NPV,ELISAbinSensSpec35$NPV)
)

B<-1e5
sensSpecPpvNpvBS<-list()
for(b in 1:B){
  datTmp<-dat[sample(x=1:nrow(dat),size=nrow(dat),replace=T),]
  
  ELISAbinSensSpec35Tmp<-compSensSpec(dat=datTmp[!is.element(el=datTmp$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=35,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
  ELISAbinSensSpec32Tmp<-compSensSpec(dat=datTmp[!is.element(el=datTmp$PCR.Visit,set=c(3,5)),],timeVar="PCR.Visit",goldStdVar="PCRCT.Ct",diagVar="ELISA.bin",goldStdThreshold=32,nEval=3,logScale=F,logScaleOffSet=0)[2,2:5]
  
  sensSpecPpvNpvBS[[b]]<-data.frame(
    qPCR_Thr=c(32,35),
    sensitivity=c(ELISAbinSensSpec32Tmp$sensitivity,ELISAbinSensSpec35Tmp$sensitivity),
    specificity=c(ELISAbinSensSpec32Tmp$specificity,ELISAbinSensSpec35Tmp$specificity),
    PPV=c(ELISAbinSensSpec32Tmp$PPV,ELISAbinSensSpec35Tmp$PPV),
    NPV=c(ELISAbinSensSpec32Tmp$NPV,ELISAbinSensSpec35Tmp$NPV)
  )
}

sensSpecPpvNpvCI<-sensSpecPpvNpv
for(i in 1:nrow(sensSpecPpvNpvCI)){
  for(j in 2:ncol(sensSpecPpvNpvCI)){
    tt<-rep(NA,B)
    for(b in 1:B){tt[b]<-sensSpecPpvNpvBS[[b]][i,j]}
    ci<-quantile(tt,probs=c(0.025,0.975),na.rm=T)
    sensSpecPpvNpvCI[i,j]<-paste(sep="","(",format(nsmall=3,round(digits=3,ci[1])),",",format(nsmall=3,round(digits=3,ci[2])),")")
  }
}

kable(sensSpecPpvNpv,caption="Sensitivity, specificity, positive predictive value (PPV) and negative predictive value (PPV) for ELISA assuming qPCR as the gold standard.", col.names=c("qPCR Threshold","Sensitivity","Specificity","PPV","NPV"),digits=3) %>%
  kable_styling(full_width=F)
Sensitivity, specificity, positive predictive value (PPV) and negative predictive value (PPV) for ELISA assuming qPCR as the gold standard.
qPCR Threshold Sensitivity Specificity PPV NPV
32 0.353 0.92 0.953 0.235
35 0.326 1.00 1.000 0.092
kable(sensSpecPpvNpvCI,caption="Bootstrapped 95% confidence intervals for the sensitivity, specificity, positive predictive value (PPV) and negative predictive value (PPV) for ELISA assuming qPCR as the gold standard.", col.names=c("qPCR Threshold","Sensitivity","Specificity","PPV","NPV"),digits=3) %>%
  kable_styling(full_width=F)
Bootstrapped 95% confidence intervals for the sensitivity, specificity, positive predictive value (PPV) and negative predictive value (PPV) for ELISA assuming qPCR as the gold standard.
qPCR Threshold Sensitivity Specificity PPV NPV
32 (0.268,0.442) (0.800,1.000) (0.881,1.000) (0.154,0.322)
35 (0.248,0.407) (1.000,1.000) (1.000,1.000) (0.039,0.153)

Screening data analysis

First, the tables from Thomas Conrad’s presentation at the lake conference.

Note:

The table for RDT vs qPCR uses a qPCR threshold of 35 and it was not possible to generate the table for a threshold of 32. qPCR Ct values are only avilable for the enrolled subjects and hence I could not re-compute the qPCR positive/negative result for a new threshold from the data file that was shared with me.

TABLE 1: RDT (rows) vs qPCR (columns):

#table(preScreen$RDT.Result,preScreen$qPCR.Result,useNA="ifany")
tt<-table(preScreen$RDT.Result,preScreen$qPCR.Result)

tt<-data.frame(var1="RDT",RDT=rownames(tt),PCRneg=tt[,"negative"],PCRpos=tt[,"positive"])

kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "qPCR" = 2)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
qPCR
negative positive
RDT negative 478 54
positive 0 21

SUPPL. TABLE X: RDT (rows) vs. ELISA (columns):

#table(preScreen$RDT.Result,preScreen$ELISA.Result,useNA="ifany")
tt<-table(preScreen$RDT.Result,preScreen$ELISA.Result)

tt<-data.frame(var1="RDT",RDT=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"])

kable(tt,caption="",row.names=F,col.names=c("","","negative","positive")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "ELISA" = 2)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
ELISA
negative positive
RDT negative 14 2
positive 2 5

Note that this is virtually the same as in Thomas’ presentation, but: * 5 more observations that are both RDT and qPCR negative than in Tom’s table (only 468 of these)

Latent class analysis:

tmpDat<-data.frame(
  rdt=ifelse(preScreen$RDT.Result=="positive",2,1),
  qpcr=ifelse(preScreen$qPCR.Result=="positive",2,1),
  elisa=ifelse(preScreen$ELISA.Result=="positive",2,1)
)

f=cbind(rdt,qpcr,elisa) ~ 1
poLCA(f, tmpDat, nclass = 2, verbose=F, na.rm=F, maxiter=1e5, nrep=5)
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $rdt
##           Pr(1) Pr(2)
## class 1:  1.000 0.000
## class 2:  0.328 0.672
## 
## $qpcr
##            Pr(1)  Pr(2)
## class 1:  0.9162 0.0838
## class 2:  0.0000 1.0000
## 
## $elisa
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.2857 0.7143
## 
## Estimated class population shares 
##  0.9446 0.0554 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.9604 0.0396 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 581 
## number of fully observed cases: 23 
## number of estimated parameters: 7 
## residual degrees of freedom: 0 
## maximum log-likelihood: -274.6922 
##  
## AIC(2): 563.3844
## BIC(2): 593.9376
## G^2(2): 80.48877 (Likelihood ratio/deviance statistic) 
## X^2(2): 134.7481 (Chi-square goodness of fit) 
## 

Exit R.

cat(file=logFile,append=T,"This is the end.\n")
## This is the end.